#!/bin/bash

# DESC: k-fold cross validation simulation. 

# USAGE: kfold_CV.sc data_labels_file data_in_file data_out_file kfold_CV_dat_file run_script postproc_script
#
#     data_labels_file
#     data_in_file
#     data_out_file
#     kfold_CV_dat_file
#     run_script
#     postproc_script

#=========================================================

# =====
# PARAMETERS
# =====

# NOTE: These are all read from the command line.

data_labels_file=$1
data_in_file=$2
data_out_file=$3
kfold_CV_dat_file=$4
run_script=$5
postproc_script=$6

echo "data_labels_file is  = " $data_labels_file
echo "data_in_file is      = " $data_in_file
echo "data_out_file is     = " $data_out_file
echo "kfold_CV_dat_file is = " $kfold_CV_dat_file
echo "run_script is        = " $run_script
echo "postproc_script is   = " $postproc_script

#=========================================================

# =====
# k-FOLD CROSS VALIDATION
# =====

cnt=0

# READ EACH LINE OF k-FOLD CV FILE
while read -r line; do

    let cnt=$cnt+1

    # SETUP FOLD DIRECTORY
    dir_name=$cnt
    rm -r $dir_name
    mkdir $dir_name

    # READ FOLD RANGE FOR THIS LINE
    fold_s=$(echo $line | cut -d " " -f 1)
    fold_e=$(echo $line | cut -d " " -f 2)
    echo $fold_s $fold_e

    # CREATE TRAINING AND TESTING FILES
    $(sed "$fold_s","$fold_e"d $data_labels_file > $dir_name/train_$data_labels_file)
    $(sed "$fold_s","$fold_e"d $data_in_file > $dir_name/train_$data_in_file)
    $(sed "$fold_s","$fold_e"d $data_out_file > $dir_name/train_$data_out_file)

    $(sed -n "$fold_s","$fold_e"p $data_labels_file > $dir_name/test_$data_labels_file)
    $(sed -n "$fold_s","$fold_e"p $data_in_file > $dir_name/test_$data_in_file)
    $(sed -n "$fold_s","$fold_e"p $data_out_file > $dir_name/test_$data_out_file)

    # COPY RUN SCRIPT
    $(cp $run_script $dir_name/)

    # EXECUTE RUN SCRIPT
    cd $dir_name

    $(source $run_script)

    cd ..

done < "$kfold_CV_dat_file"

$(source $postproc_script $cnt)
